length_ref_val <- read_xlsx('/Users/domingpa/Documents/Github Repositories/Chinook_energetics/R/output/GAM_reference_values.xlsx')
head(length_ref_val)
## # A tibble: 6 × 3
## region age ref_mean
## <chr> <dbl> <dbl>
## 1 CECR 1 612.
## 2 CECR 2 707.
## 3 CECR 3 839.
## 4 CECR 4 918.
## 5 CECR 5 906.
## 6 FRTH 1 622.
gam_predictions <- read_xlsx('/Users/domingpa/Documents/Github Repositories/Chinook_energetics/R/output/GAM_predictions.xlsx')
head(gam_predictions)
## # A tibble: 6 × 7
## region age year fit se.fit ref_mean rel_pred
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 CECR 1 1980 600. 10.1 612. -1.97
## 2 CECR 1 1981 599. 8.89 612. -2.15
## 3 CECR 1 1982 598. 7.85 612. -2.34
## 4 CECR 1 1983 597. 6.99 612. -2.52
## 5 CECR 1 1984 595. 6.31 612. -2.71
## 6 CECR 1 1985 594. 5.78 612. -2.91
colnames(gam_predictions)[2:3] <- c("ocean_age","brood_year")
| region | brood_year | 1 | 2 | 3 | 4 | 5 |
|---|---|---|---|---|---|---|
| CECR | 2008 | 614.2402 | 706.8939 | 833.1406 | 906.277004042424 | NA |
| CECR | 2009 | 615.5813 | 706.5605 | 828.6538 | 898.200344675202 | NA |
| CECR | 2010 | 616.9442 | 706.2197 | 824.0969 | 890.041501740687 | NA |
| FRTH | 1980 | 567.1555 | 673.5965 | 845.8200 | NA | NA |
| FRTH | 1981 | 569.8844 | 674.9115 | 847.4117 | NA | NA |
| FRTH | 1982 | 572.5491 | 676.2243 | 849.0288 | NA | NA |
| FRTH | 1983 | 575.1021 | 677.5355 | 850.6648 | NA | NA |
| FRTH | 1984 | 577.4986 | 678.8474 | 852.3196 | NA | NA |
| FRTH | 1985 | 579.7072 | 680.1636 | 853.9999 | NA | NA |
| FRTH | 1986 | 581.7178 | 681.4891 | 855.7181 | NA | NA |
| FRTH | 1987 | 583.5443 | 682.8299 | 857.4908 | NA | NA |
In this case, the NAs belonging to the CECR 2008-2010 and FRTH 1980 - 1987 at age 5 are due to the ‘rel_pred’ and ‘reference size’ value being missing at this age.
Let’s check where the NAs are
print(NA_size_at_age)
## # A tibble: 23 × 2
## # Groups: region [21]
## region ocean_age
## <chr> <dbl>
## 1 CECR 5
## 2 FRTH 4
## 3 FRTH 5
## 4 GRAY 1
## 5 GRAY 5
## 6 GST 5
## 7 HOOD 5
## 8 JNST 5
## 9 JUAN 5
## 10 KLTR 5
## # ℹ 13 more rows
All regions have at least one missing value at ‘age-5’, and only the ‘GRAY’ region has missing values at ages 1 and 5. Since the FRAM abundance database does not contain information for age 1, the missing values at ‘age-1’ will not be relevant in the next calculations.
But before filling the NAs, let’s see how the data looks so far.
To fill the NAs, I tackled this issue based on two
assumptions:
1) If there was data available for age 5, I kept the ‘fitted lengths’, ‘se fitted lengths’, ‘reference means’ and “rel_pred” constant for the previous years relative to the last year with information.
2) For the years for age 5 without the ‘fitted lengths’, ‘se fitted lengths’, ‘reference means’ and “rel_pred”, the values were taken relative to age 4.
Additionally, JUAN-age 5 is removed since the size changes were estimated from few samples, resulting in a dramatic size decrease. The age-5 sizes were calculated based nder assumption # 2.
gam_predictions <- gam_predictions%>%
filter(!(region == 'JUAN' & ocean_age == 5))
full_gam_predictions <- check.grid%>%
left_join(gam_predictions, by = c("region","brood_year","ocean_age"))%>%
group_by(region, ocean_age)%>%
arrange(region,ocean_age)%>%
fill(fit, se.fit, ref_mean, rel_pred, .direction = 'down')%>% #Fill NAs under assumption 1
ungroup()%>%
group_by(region, brood_year)%>%
arrange(region,brood_year)%>%
fill(fit, se.fit, ref_mean, rel_pred, .direction = 'down')%>% #Fill NAs under assumption 2
ungroup()
avg_size_at_year <- full_gam_predictions%>%
group_by(ocean_age,brood_year)%>%
summarise(avg.size = mean(fit, na.rm = TRUE))%>%
ungroup()
Chinook salmon size over brood year - Each line is a region and
color indicates ocean age
Chinook salmon size changes divided by ocean age - Color
indicates region and black line is the mean size trend for each age
Size changes over brood year for each region - Color indicates
ocean age
I manually organized this database in Excel. O’neill et al. 2014, calculated the lipid tier for each stock at the FRAM stock level, and Ohlberger et al. 2018 estimated the salmon stock lengths at the RMIS level. Hence, I matched FRAM stocks to RMIS regions by overlaying the FRAM stock origin river with the RMIS atlas.
lipid_ranking_params <-read_xlsx(here("R","data","SRKW_prey_kcal.xlsx"), sheet = 12) #Parameters to calculate the lipid content relative to lipid ranking and length
lipid_ranking_stock <- read_xlsx(here("R","data","SRKW_prey_kcal.xlsx"), sheet = 9) #Tab 'Lipid calls clean'
lipid_ranking_stock <- lipid_ranking_stock%>%distinct(FRAM.long.names,RMIS.Region, .keep_all = TRUE) #Collapsing by FRAM Stock and RMIS regions since the tier does not change among age classes.
lipid_ranking_stock <- lipid_ranking_stock[-6] #Remove 'Age' column
#Lipid rankings were assigned only to 'Marked' individuals. However, as the abundance database contains information on both marked and unmarked stocks,
#we assume that marked and unmarked animals from the same stock belong to the same lipid category.
lipid_ranking_stock_unmarked <- lipid_ranking_stock%>%mutate(FRAM.long.names = paste0("Un",FRAM.long.names))
lipid_ranking_stock <- rbind(lipid_ranking_stock,lipid_ranking_stock_unmarked)
lipid_ranking_stock$main.id <- c(1:length(lipid_ranking_stock$main.id)) #Re-generate main.id.
stock_abundance <- read_csv(here("R","data","Cohort_Stock_Shelton_seasons.csv"))
stock_abundance <- left_join(stock_abundance,lipid_ranking_stock, by = c('StockLongName'='FRAM.long.names'))
The following stocks did not have lipid tier and were assigned to the ‘medium’ category:
“Unmarked Nooksack Spr Natural” “Marked Nooksack Spr Hatchery,” and “Unmarked Nooksack Spr Hatchery,” which belong to the NOWA RMIS region.
Additionally, the stocks “Unmarked Mid Oregon Coast Fall” and “Marked Mid Oregon Coast Fall” were also assigned to the ‘medium’ lipid tier and are associated with the NOOR RMIS region.
stock_abundance <- stock_abundance%>%
mutate(Lipid.Ranking.fix = if_else(is.na(`Lipid Ranking`),'medium',`Lipid Ranking`),
RMIS.Region.fix = case_when(
StockLongName %in% c("UnMarked Nooksack Spr Natural", "Marked Nooksack Spr Hatchery", "UnMarked Nooksack Spr Hatchery") ~ "NOWA",
StockLongName %in% c("UnMarked Mid Oregon Coast Fall", "Marked Mid Oregon Coast Fall") ~ "NOOR",
TRUE ~ RMIS.Region),
FRAM.names.fix = str_remove(StockLongName, "^(Marked |UnMarked )"))
The ‘Cohort’ year is necessary since the size estimates database needs to be linked to the FRAM abundance database. This way, it is possible to calculate the total lipids per stock/ocean age/year.
The assumption here is that the Cohort year (or Run year) is calculated as:
\[\text{Cohort} = \text{Brood year} +
\text{Freshwater residence} + \text{Ocean age}\] The
freshwater age is estimated as follows: fish returning in
spring (March-June) are 2 years old in freshwater (FW age-2), whereas
fish returning in summer and fall (July-November) are 1 year old in
freshwater (FW age-1).
We could derive the Brood year as:
\[\text{Brood year}\ = \text{Cohort} - \text{Freshwater residence} - \text{Ocean age}\]
stock_fw_age <- read_xlsx(here("R","data","FRAM stock_season run and fw age.xlsx"), sheet = 1)
stock_fw_age2 <- stock_fw_age%>%mutate(FRAM.long.names = paste0("Un",FRAM.long.names)) #The catalog has only 'Marked' stocks.
stock_fw_age_full <- rbind(stock_fw_age,stock_fw_age2)
stock_fw_age_full <- stock_fw_age_full[c(1,3)]
stock_abundance <-stock_abundance%>%
left_join(stock_fw_age_full, by = c("StockLongName"="FRAM.long.names"))
sum(is.na(stock_abundance$Freshwater.age)) #3487
## [1] 3487
#Identify stocks missing Freshwater age
stock.missing.fw.age <- stock_abundance %>%
filter(is.na(Freshwater.age)) %>%
distinct(StockLongName)
#Add Freshwater age to stocks
stock.missing.fw.age <- stock.missing.fw.age %>%
mutate(Freshwater.age = case_when(
str_detect(StockLongName, "Fall") ~ 1,
str_detect(StockLongName, "Spr") ~ 2,
TRUE ~ NA_real_ # Si no es Fall ni Spr, asigna NA
))
#colnames(stock.missing.fw.age)[1] <- "StockLongName"
#Add the new info to the abundance database
stock_abundance <- stock_abundance%>%
left_join(stock.missing.fw.age, by = "StockLongName") %>% # Unimos las tablas por StockLongName
mutate(Freshwater.age = if_else(is.na(Freshwater.age.x), Freshwater.age.y, Freshwater.age.x))%>%
select(-Freshwater.age.y, -Freshwater.age.x)
stock_abundance <- stock_abundance%>%
mutate(Brood.year = Year.run - Freshwater.age - Age)
Now the abundance and the size predictions databases are linked by ‘RMIS region’, ‘brood year’ and ‘ocean age’
temp <- stock_abundance%>%
select(StockID, Age, StartCohort,Shelton.TimeStep,Year.run, Season.run, StockLongName,StockName = FRAM.names.fix, RMIS.Region.fix, Lipid.Ranking.fix,Brood.year)%>%
left_join(full_gam_predictions, by = c("RMIS.Region.fix"='region','Brood.year'='brood_year','Age'='ocean_age'))
SPS has abundance data but not size estimates. Therefore, it is assumed that the lengths of SPS salmon are similar and follow trends comparable to those from nearby areas, such as HOOD.
## region
## 1 SPS
sps_values <- full_gam_predictions %>%
filter(region == "HOOD")%>%
mutate(region = "SPS")
# Step 2: Fill missing SPS values with HOOD values
stock_abundance <- temp %>%
left_join(sps_values, by = c('RMIS.Region.fix'='region',"Brood.year"='brood_year', "Age"='ocean_age'), suffix = c("", ".sps")) %>% # Merge SPS's values
mutate(
fit = if_else(RMIS.Region.fix == "SPS" & is.na(fit), fit.sps, fit),
se.fit = if_else(RMIS.Region.fix == "SPS" & is.na(se.fit), se.fit.sps, se.fit),
ref_mean = if_else(RMIS.Region.fix == "SPS" & is.na(ref_mean), ref_mean.sps, ref_mean),
rel_pred = if_else(RMIS.Region.fix == "SPS" & is.na(rel_pred), rel_pred.sps, rel_pred)
) %>%
select(-ends_with(".sps")) # Remove temporary columns
rm(temp)
Since the abundance data span up to 2020, the size estimates from 2011 to 2017 (Brood year) are assumed to remain constant after 2010 which is the last year with size estimations. An example of this is shown in the plot below:
stock_abundance_full <-stock_abundance%>%
arrange(StockLongName,Season.run,Age)%>%
group_by(StockLongName,Season.run,Age)%>%
fill(fit, se.fit, ref_mean, rel_pred, .direction = 'down')%>%
ungroup()%>%
arrange(StockLongName, Year.run, Shelton.TimeStep, Age)
ggplot(stock_abundance_full[stock_abundance_full$Season.run == '.Spr',], aes(x = Year.run, y = fit, color = as.factor(Age)))+
geom_line()+
facet_wrap(~StockLongName, ncol = 4)
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
###Calculate lipid content
Three lipid indexes were calculated:
lipid_content_f refers to lipids calculated from
predicted lengths (fit),
lipid_content_t refers to lipids calculated from the
reference mean lengths (ref_mean).
lipid_content_c refers to lipids calculated assuming
that the lengths of the fish have not changed from the first brood year
1980 (contant_length).
The indexes were calculated according to the lipid ranking and the lenght-lipid relationship parameters found in O’neill et al., 2014:
For ‘high´lipid ranking \[\text{kcal}^{-1} = 1.8034e^{-05}*(\text{length})^{3.0796}\] For ’medium’ lipid ranking \[\text{kcal}^{-1} = 1.1051e^{-05}*(\text{length})^{3.122}\] For ‘low’ lipid ranking \[\text{kcal}^{-1} = 7.2074e^{-06}*(\text{length})^{3.143}\]
Then, the TOTAL lipid content (predicted, theoretical and constant) was calculated by multiplying the lipid context index by the abundance.
constantlengthref <- stock_abundance_full %>%
select(StockLongName,Year.run, Age, fit)%>%
rename(constant_length = fit)%>%
distinct(StockLongName, Year.run,Age, .keep_all = TRUE)%>%
group_by(StockLongName,Age)%>%
filter(Year.run == min(Year.run))%>%
ungroup()%>%
select(-Year.run)
stock_abundance_full <- stock_abundance_full%>%
left_join(constantlengthref, by = c('StockLongName','Age'))%>%
mutate(lipid_content_f = case_when(Lipid.Ranking.fix == "high" ~ lipid_ranking_params$a[1]*fit^(lipid_ranking_params$b[1]),
Lipid.Ranking.fix == "medium" ~ lipid_ranking_params$a[2]*fit^(lipid_ranking_params$b[2]),
Lipid.Ranking.fix == "low" ~ lipid_ranking_params$a[3]*fit^(lipid_ranking_params$b[3])),
lipid_content_t = case_when(Lipid.Ranking.fix == "high" ~ lipid_ranking_params$a[1]*ref_mean^(lipid_ranking_params$b[1]),
Lipid.Ranking.fix == "medium" ~ lipid_ranking_params$a[2]*ref_mean^(lipid_ranking_params$b[2]),
Lipid.Ranking.fix == "low" ~ lipid_ranking_params$a[3]*ref_mean^(lipid_ranking_params$b[3])),
lipid_content_c = case_when(Lipid.Ranking.fix == "high" ~ lipid_ranking_params$a[1]*constant_length^(lipid_ranking_params$b[1]),
Lipid.Ranking.fix == "medium" ~ lipid_ranking_params$a[2]*constant_length^(lipid_ranking_params$b[2]),
Lipid.Ranking.fix == "low" ~ lipid_ranking_params$a[3]*constant_length^(lipid_ranking_params$b[3])),
total_lipid_content_f = lipid_content_f*StartCohort,
total_lipid_content_t = lipid_content_t*StartCohort,
total_lipid_content_c = lipid_content_c*StartCohort
)
ggplot(stock_abundance_full[stock_abundance_full$Season.run == '.Spr' & stock_abundance_full$Age >3 ,],aes(x=Year.run, color = as.factor(Age)))+
geom_line(aes(y=lipid_content_f), linetype = "solid", size =0.7)+
geom_line(aes(y=lipid_content_c), linetype = "dashed", size=0.7)+
theme_minimal()+
labs(title = "Predicted and Constant (Red) lipid content over time")+
facet_wrap(~StockLongName, ncol = 5)
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
temp <- stock_abundance_full%>%
group_by(StockName, Season.run, Year.run, Age)%>%
summarise(sum_total_lipid_content_f = sum(total_lipid_content_f),
sum_total_lipid_content_c = sum(total_lipid_content_c))%>%
ungroup()
## `summarise()` has grouped output by 'StockName', 'Season.run', 'Year.run'. You
## can override using the `.groups` argument.
ggplot(temp[temp$Season.run == '.Spr' & temp$Age >3,], aes (x = Year.run, color = as.factor(Age)))+
geom_line(aes( y = sum_total_lipid_content_f), linetype = 'solid', size = 0.7)+
geom_line(aes( y = sum_total_lipid_content_c), linetype = 'dashed', size = 0.7)+
theme_minimal()+
facet_wrap(~StockName, ncol = 3, scales = 'free')